Conversation
| RUN pip3 install -r requirements.txt | ||
|
|
||
| WORKDIR /root | ||
| RUN git clone https://github.com/PKWadsy/cares_pokemon_configs.git cares_rl_configs |
There was a problem hiding this comment.
Can probably pull from google drive with some reconfiguration.
There was a problem hiding this comment.
yea, we should be putting ROMs on github
| pydantic==1.10.13 | ||
| torch==2.3.1 | ||
| pyboy==2.2.1 | ||
| pyboy==2.2.2 |
There was a problem hiding this comment.
Test if v2.5.1 is usable.
| return self.env.reset() | ||
|
|
||
| def step(self, action: int) -> tuple: | ||
| # debug-log logging.info("Logging109") |
There was a problem hiding this comment.
Remove with logging import.
| """ | ||
|
|
||
| import logging | ||
| import os |
There was a problem hiding this comment.
Possible removal?
| WORKDIR /workspace/cares_reinforcement_learning | ||
| RUN git checkout -t origin/action-info-logging |
There was a problem hiding this comment.
the base docker for everything should be off the release versions
| RUN pip3 install -r requirements.txt | ||
|
|
||
| WORKDIR /root | ||
| RUN git clone https://github.com/PKWadsy/cares_pokemon_configs.git cares_rl_configs |
There was a problem hiding this comment.
yea, we should be putting ROMs on github
| @abc.abstractmethod | ||
| def action_as_string(self, action): | ||
| raise NotImplemented("Override this method") | ||
|
|
There was a problem hiding this comment.
why is not implemented for all the other tasks?
Remove this at this level
| # Horrible hack so I don't have to change all the algorithms | ||
| select_action_from_policy = agent.select_action_from_policy | ||
|
|
||
| if "info" in inspect.signature(select_action_from_policy).parameters: | ||
| normalised_action = select_action_from_policy( | ||
| state, noise_scale=noise_scale, info=step_data | ||
| ) | ||
| else: | ||
| normalised_action = select_action_from_policy( | ||
| state, noise_scale=noise_scale | ||
| ) |
| domain: Optional[str] = "" | ||
| display: Optional[int] = 0 |
There was a problem hiding this comment.
remove the redundant Optional
| record.stop_video() | ||
| video_dir = os.path.join(record.directory, "videos") | ||
| data_dir = os.path.join(record.directory, "data") | ||
|
|
||
| run_csv = os.path.join(data_dir, f"episode_{episode_num}.csv") | ||
| pd.DataFrame(run_data_rows).to_csv(run_csv, index=False) | ||
|
|
||
| if episode_reward > highest_reward: | ||
|
|
||
| highest_reward = episode_reward | ||
|
|
||
| new_record_video = os.path.join( | ||
| video_dir, f"new_record_episode_{episode_num+1}.mp4" | ||
| ) | ||
| training_video = os.path.join(video_dir, "temp_train_video.mp4") | ||
|
|
||
| logging.info( | ||
| f"New highest reward of {episode_reward}. Saving video and run data..." | ||
| ) | ||
|
|
||
| try: | ||
| os.rename(training_video, new_record_video) | ||
| except: | ||
| logging.error("An error renaming the video occured :/") |
scripts/train_loops/policy_loop.py
Outdated
| if (total_step_counter + 1) % number_steps_per_evaluation == 0: | ||
| logging.info("*************--Evaluation Loop--*************") | ||
| evaluate_policy_network( | ||
| env_eval, | ||
| agent, | ||
| train_config, | ||
| record=record, | ||
| total_steps=total_step_counter, | ||
| normalisation=normalisation, | ||
| ) | ||
| logging.info("--------------------------------------------") |
There was a problem hiding this comment.
nope - this does not get removed
There was a problem hiding this comment.
Delete every single change made to policy loop - not happening
Adds